/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search;

import java.util.HashMap;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.HighlightParams;
import org.apache.solr.util.TestHarness;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestComplexPhraseQParserPlugin extends SolrTestCaseJ4 {

  @BeforeClass
  public static void beforeClass() throws Exception {
    initCore("solrconfig.xml", "schema15.xml");
  }

  @Override
  public void setUp() throws Exception {
    super.setUp();
  }

  @Override
  public void tearDown() throws Exception {
    super.tearDown();
  }

  @Test
  public void testDefaultField() {

    assertU(adoc("text", "john smith", "id", "1"));
    assertU(adoc("text", "johathon smith", "id", "2"));
    assertU(adoc("text", "john percival smith", "id", "3"));
    assertU(commit());
    assertU(optimize());

    assertQ(
        req("q", "{!complexphrase} \"john smith\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='1']");

    assertQ(
        req("q", "{!complexphrase} \"j* smyth~\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(
        req("q", "{!complexphrase} \"(jo* -john) smith\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(
        req("q", "{!complexphrase} \"jo* smith\"~2"),
        "//result[@numFound='3']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//doc[./str[@name='id']='3']");

    assertQ(
        req("q", "{!complexphrase} \"jo* [sma TO smz]\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(
        req("q", "{!complexphrase} \"john\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='3']");

    assertQ(
        req("q", "{!complexphrase} \"(john johathon) smith\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");
  }

  @Test
  public void test() {
    HashMap<String, String> args = new HashMap<>();

    args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
    args.put(CommonParams.FL, "id");

    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory("", 0, 200, args);

    assertU(adoc("name", "john smith", "id", "1"));
    assertU(adoc("name", "johathon smith", "id", "2"));
    assertU(adoc("name", "john percival smith", "id", "3"));
    assertU(commit());
    assertU(optimize());

    assertQ(
        "Simple multi-term still works",
        sumLRF.makeRequest("name:\"john smith\""),
        "//doc[./str[@name='id']='1']",
        "//result[@numFound='1']");

    assertQ(
        req("q", "{!complexphrase} name:\"john smith\""),
        "//doc[./str[@name='id']='1']",
        "//result[@numFound='1']");

    assertQ(
        "wildcards and fuzzies are OK in phrases",
        sumLRF.makeRequest("name:\"j* smyth~\""),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//result[@numFound='2']");

    assertQ(
        "boolean logic works",
        sumLRF.makeRequest("name:\"(jo* -john) smith\""),
        "//doc[./str[@name='id']='2']",
        "//result[@numFound='1']");

    assertQ(
        "position logic works",
        sumLRF.makeRequest("name:\"jo*  smith\"~2"),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//doc[./str[@name='id']='3']",
        "//result[@numFound='3']");

    assertQ(
        "range queries supported",
        sumLRF.makeRequest("name:\"jo* [sma TO smz]\""),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//result[@numFound='2']");

    assertQ(
        "Simple single-term still works",
        sumLRF.makeRequest("name:\"john\""),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='3']",
        "//result[@numFound='2']");

    assertQ(
        "OR inside phrase works",
        sumLRF.makeRequest("name:\"(john johathon) smith\""),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//result[@numFound='2']");

    assertQEx(
        "don't parse subqueries",
        "SyntaxError",
        sumLRF.makeRequest("_query_:\"{!prefix f=name v=smi}\""),
        SolrException.ErrorCode.BAD_REQUEST);
    assertQEx(
        "don't parse subqueries",
        "SyntaxError",
        sumLRF.makeRequest("{!prefix f=name v=smi}"),
        SolrException.ErrorCode.BAD_REQUEST);
  }

  @Test
  public void testPhraseHighlighter() {
    HashMap<String, String> args = new HashMap<>();

    args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
    args.put(CommonParams.FL, "id");
    args.put(HighlightParams.HIGHLIGHT, Boolean.TRUE.toString());
    args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
    args.put(HighlightParams.FIELD_MATCH, Boolean.FALSE.toString());

    args.put(HighlightParams.FRAGSIZE, String.valueOf(0));
    args.put(HighlightParams.FIELDS, "name");

    TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory("", 0, 200, args);

    assertU(adoc("name", "john smith smith john", "id", "1"));
    assertU(adoc("name", "johathon smith smith johathon", "id", "2"));
    assertU(adoc("name", "john percival smith", "id", "3"));
    assertU(commit());
    assertU(optimize());

    assertQ(
        "range queries supported",
        sumLRF.makeRequest("name:[sma TO smz]"),
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']",
        "//doc[./str[@name='id']='3']",
        "//result[@numFound='3']");

    sumLRF = h.getRequestFactory("", 0, 200, args);
    assertQ(
        "PhraseHighlighter=true Test",
        sumLRF.makeRequest("name:\"(john johathon) smith\""),
        "//lst[@name='highlighting']/lst[@name='1']",
        "//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> <em>smith</em> smith john']",
        "//lst[@name='highlighting']/lst[@name='2']",
        "//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> <em>smith</em> smith johathon']");

    /* no longer supported since Lucene 9.5 but that's okay!
    args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.FALSE.toString());
    sumLRF = h.getRequestFactory("", 0, 200, args);
    assertQ(
        "PhraseHighlighter=false Test",
        sumLRF.makeRequest("name:\"(john johathon) smith\""),
        "//lst[@name='highlighting']/lst[@name='1']",
        "//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> <em>smith</em> <em>smith</em> <em>john</em>']",
        "//lst[@name='highlighting']/lst[@name='2']",
        "//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> <em>smith</em> <em>smith</em> <em>johathon</em>']");
    */

    assertQ(
        "Highlight Plain Prefix Query Test",
        sumLRF.makeRequest("name:jo*"),
        "//lst[@name='highlighting']/lst[@name='1']",
        "//lst[@name='1']/arr[@name='name']/str[.='<em>john</em> smith smith <em>john</em>']",
        "//lst[@name='highlighting']/lst[@name='2']",
        "//lst[@name='2']/arr[@name='name']/str[.='<em>johathon</em> smith smith <em>johathon</em>']",
        "//lst[@name='highlighting']/lst[@name='3']",
        "//lst[@name='3']/arr[@name='name']/str[.='<em>john</em> percival smith']");
  }

  @Test
  public void testMultipleFields() {

    assertU(adoc("text", "protein digest", "name", "dna rules", "id", "1"));
    assertU(adoc("text", "digest protein", "name", "rna is the workhorse", "id", "2"));

    assertU(adoc("text", "dna rules", "name", "protein digest", "id", "3"));
    assertU(adoc("text", "dna really rules", "name", "digest protein", "id", "4"));

    assertU(commit());
    assertU(optimize());

    assertQ(
        req("q", "{!complexphrase} name:\"protein digest\" AND text:\"dna rules\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='3']");

    assertQ(
        req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"d* r*\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='3']");

    assertQ(
        req(
            "q",
            "{!complexphrase inOrder=\"false\"} name:\"dna* rule*\" AND text:\"prot* diges*\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='1']");

    assertQ(
        req("q", "{!complexphrase inOrder=false} name:\"protein digest\" AND text:\"dna rules\"~2"),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='3']",
        "//doc[./str[@name='id']='4']");

    assertQ(
        req(
            "q",
            "{!complexphrase inOrder=\"true\"} name:\"protein digest\" AND text:\"dna rules\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='3']");
  }

  @Test
  public void testUnorderedPhraseQuery() {

    assertU(adoc("text", "protein digest", "id", "1"));
    assertU(adoc("text", "digest protein", "id", "2"));

    assertU(adoc("name", "protein digest", "id", "3"));
    assertU(adoc("name", "digest protein", "id", "4"));

    assertU(commit());
    assertU(optimize());

    // ordered phrase query return only fist document
    assertQ(
        req("q", "{!complexphrase} \"protein digest\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='1']");

    assertQ(
        req("q", "{!complexphrase} \"pro* di*\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='1']");

    assertQ(
        req("q", "{!complexphrase} name:\"protein digest\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='3']");

    assertQ(
        req("q", "{!complexphrase} name:\"pro* di*\""),
        "//result[@numFound='1']",
        "//doc[./str[@name='id']='3']");

    // unordered phrase query returns two documents.
    assertQ(
        req("q", "{!complexphrase inOrder=false} \"digest protein\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(
        req("q", "{!complexphrase inOrder=false} \"di* pro*\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(
        req("q", "{!complexphrase inOrder=false} name:\"digest protein\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='3']",
        "//doc[./str[@name='id']='4']");

    assertQ(
        req("q", "{!complexphrase inOrder=false} name:\"di* pro*\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='3']",
        "//doc[./str[@name='id']='4']");

    // inOrder parameter can be defined with local params syntax.
    assertQ(
        req("q", "{!complexphrase inOrder=false} \"di* pro*\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    assertQ(req("q", "{!complexphrase inOrder=true} \"di* pro*\""), "//result[@numFound='1']");

    // inOrder and df parameters can be defined with local params syntax.
    assertQ(
        req("q", "{!complexphrase inOrder=false df=name} \"di* pro*\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='3']",
        "//doc[./str[@name='id']='4']");
  }

  /** the query "sulfur-reducing bacteria" was crashing due to the dash inside the phrase. */
  @Test
  public void testHyphenInPhrase() {

    assertU(adoc("text", "sulfur-reducing bacteria", "id", "1"));
    assertU(adoc("text", "sulfur reducing bacteria", "id", "2"));

    assertU(adoc("name", "sulfur-reducing bacteria", "id", "3"));
    assertU(adoc("name", "sulfur reducing bacteria", "id", "4"));

    assertU(commit());
    assertU(optimize());

    assertQ(
        req("q", "{!complexphrase} \"sulfur-reducing bacteria\""),
        "//result[@numFound='2']",
        "//doc[./str[@name='id']='1']",
        "//doc[./str[@name='id']='2']");

    // the analysis for "name" currently does not break on "-" (only whitespace) and thus only
    // matches one doc
    assertQ(
        req("q", "{!complexphrase} name:\"sulfur-reducing bacteria\""), "//result[@numFound='1']");
  }
}
